#Imports
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import time
from fredapi import Fred
import pyodbc
from sqlalchemy import create_engine
from mpl_toolkits.mplot3d import Axes3D
from sklearn.linear_model import LinearRegression
##plot styles
plt.style.use('fivethirtyeight')
pd.set_option('max_columns', 500)
color_pal = plt.rcParams['axes.prop_cycle'].by_key()['color']
matplotlib.rcParams['figure.figsize'] = (12, 8)
#Connections
fred_key = '041a29909d1c85d5241bfe1c2eeba2a8'
#export to sql
connection_string = 'DRIVER={SQL Server};SERVER=DESKTOP-SHVFG6K\SQLEXPRESS;DATABASE=inflation_project;Trusted_Connection=yes;'
engine = create_engine(f"mssql+pyodbc:///?odbc_connect={connection_string}")
#import from sql
conn = pyodbc.connect(
"Driver={SQL Server};"
"Server=DESKTOP-SHVFG6K\SQLEXPRESS;"
"Database=inflation_project;"
"Trusted_Connection=yes;"
)
fred = Fred(api_key = fred_key)
fred.search('Corporate Profits', order_by='popularity')
| id | realtime_start | realtime_end | title | observation_start | observation_end | frequency | frequency_short | units | units_short | seasonal_adjustment | seasonal_adjustment_short | last_updated | popularity | notes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| series id | |||||||||||||||
| CP | CP | 2023-06-20 | 2023-06-20 | Corporate Profits After Tax (without IVA and C... | 1947-01-01 | 2023-01-01 | Quarterly | Q | Billions of Dollars | Bil. of $ | Seasonally Adjusted Annual Rate | SAAR | 2023-05-25 07:53:13-05:00 | 71 | BEA Account Code: A055RC |
| PI | PI | 2023-06-20 | 2023-06-20 | Personal Income | 1959-01-01 | 2023-04-01 | Monthly | M | Billions of Dollars | Bil. of $ | Seasonally Adjusted Annual Rate | SAAR | 2023-05-26 07:43:16-05:00 | 62 | BEA Account Code: A065RC Personal income is th... |
| A053RC1Q027SBEA | A053RC1Q027SBEA | 2023-06-20 | 2023-06-20 | National income: Corporate profits before tax ... | 1947-01-01 | 2023-01-01 | Quarterly | Q | Billions of Dollars | Bil. of $ | Seasonally Adjusted Annual Rate | SAAR | 2023-05-25 07:53:17-05:00 | 55 | BEA Account Code: A053RC For more information... |
| BOGZ1FL153064486Q | BOGZ1FL153064486Q | 2023-06-20 | 2023-06-20 | Households and Nonprofit Organizations; Direct... | 1945-10-01 | 2023-01-01 | Quarterly | Q | Percent | % | Not Seasonally Adjusted | NSA | 2023-06-08 15:18:04-05:00 | 54 | Source ID: FL153064486.Q For more information... |
| QUSPAM770A | QUSPAM770A | 2023-06-20 | 2023-06-20 | Total Credit to Private Non-Financial Sector, ... | 1947-10-01 | 2022-10-01 | Quarterly, End of Quarter | Q | Percentage of GDP | Percentage of GDP | Not Seasonally Adjusted | NSA | 2023-06-12 13:01:02-05:00 | 50 | Credit is provided by domestic banks, all othe... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| N513RC0A144NBEA | N513RC0A144NBEA | 2023-06-20 | 2023-06-20 | Corporate profits before tax: Domestic industr... | 1998-01-01 | 2019-01-01 | Annual | A | Millions of Dollars | Mil. of $ | Not Seasonally Adjusted | NSA | 2022-10-12 15:38:06-05:00 | 1 | BEA Account Code: N513RC For more information... |
| CRDQCABPABIS | CRDQCABPABIS | 2023-06-20 | 2023-06-20 | Credit to Private Non-Financial Sector by Dome... | 1954-01-01 | 2022-10-01 | Quarterly, End of Quarter | Q | Billions of Canadian Dollars | Bil. of Canadian $ | Not Seasonally Adjusted | NSA | 2023-06-12 13:03:15-05:00 | 1 | Credit is provided by domestic banks, all othe... |
| CRDQHKAHABIS | CRDQHKAHABIS | 2023-06-20 | 2023-06-20 | Total Credit to Households and Non-Profit Inst... | 1990-10-01 | 2022-10-01 | Quarterly, End of Quarter | Q | Billions of Hong Kong Dollars | Bil. of Hong Kong $ | Not Seasonally Adjusted | NSA | 2023-06-12 13:03:08-05:00 | 1 | Credit is provided by domestic banks, all othe... |
| N415RC1A027NBEA | N415RC1A027NBEA | 2023-06-20 | 2023-06-20 | Corporate profits with inventory valuation adj... | 1998-01-01 | 2022-01-01 | Annual | A | Billions of Dollars | Bil. of $ | Not Seasonally Adjusted | NSA | 2023-03-30 07:56:01-05:00 | 1 | BEA Account Code: N415RC For more information... |
| QBEGAMUSDA | QBEGAMUSDA | 2023-06-20 | 2023-06-20 | Total Credit to General Government, Adjusted f... | 1998-10-01 | 2022-10-01 | Quarterly, End of Quarter | Q | Billions of US Dollars | Bil. of US $ | Not Seasonally Adjusted | NSA | 2023-06-12 13:02:53-05:00 | 1 | Credit is provided by domestic banks, all othe... |
1000 rows × 15 columns
# create corporate profits datafram
corp_profits = fred.get_series(series_id = 'CP')
corp_profits_df = pd.DataFrame(corp_profits, columns=['corp_profits_billions'])
corp_profits_df = corp_profits_df.reset_index()
corp_profits_df = corp_profits_df.rename(columns={'index': 'Date'})
corp_profits_df
| Date | corp_profits_billions | |
|---|---|---|
| 0 | 1946-01-01 | NaN |
| 1 | 1946-04-01 | NaN |
| 2 | 1946-07-01 | NaN |
| 3 | 1946-10-01 | NaN |
| 4 | 1947-01-01 | 21.970 |
| ... | ... | ... |
| 304 | 2022-01-01 | 2833.431 |
| 305 | 2022-04-01 | 3043.114 |
| 306 | 2022-07-01 | 2890.812 |
| 307 | 2022-10-01 | 2721.294 |
| 308 | 2023-01-01 | 2663.671 |
309 rows × 2 columns
# Export corporate profits data to mssql
# corp_profits_df.to_sql('corp_profits', engine, index=False, if_exists='replace')
# engine.dispose()
plt.title("Corporate Profits After Tax")
plt.xlabel("Date")
plt.ylabel("Billions of Dollars")
corp_profits.plot(figsize=(10, 5))
<AxesSubplot:title={'center':'Corporate Profits After Tax'}, xlabel='Date', ylabel='Billions of Dollars'>
fred.search('Consumer Price Index for All Urban Consumers', order_by = 'popularity')
| id | realtime_start | realtime_end | title | observation_start | observation_end | frequency | frequency_short | units | units_short | seasonal_adjustment | seasonal_adjustment_short | last_updated | popularity | notes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| series id | |||||||||||||||
| CPIAUCSL | CPIAUCSL | 2023-06-20 | 2023-06-20 | Consumer Price Index for All Urban Consumers: ... | 1947-01-01 | 2023-05-01 | Monthly | M | Index 1982-1984=100 | Index 1982-1984=100 | Seasonally Adjusted | SA | 2023-06-13 07:44:03-05:00 | 94 | The Consumer Price Index for All Urban Consume... |
| CUSR0000SETA02 | CUSR0000SETA02 | 2023-06-20 | 2023-06-20 | Consumer Price Index for All Urban Consumers: ... | 1953-01-01 | 2023-05-01 | Monthly | M | Index 1982-1984=100 | Index 1982-1984=100 | Seasonally Adjusted | SA | 2023-06-13 07:43:02-05:00 | 82 | None |
| CPILFESL | CPILFESL | 2023-06-20 | 2023-06-20 | Consumer Price Index for All Urban Consumers: ... | 1957-01-01 | 2023-05-01 | Monthly | M | Index 1982-1984=100 | Index 1982-1984=100 | Seasonally Adjusted | SA | 2023-06-13 07:44:03-05:00 | 82 | The "Consumer Price Index for All Urban Consum... |
| APU0000708111 | APU0000708111 | 2023-06-20 | 2023-06-20 | Average Price: Eggs, Grade A, Large (Cost per ... | 1980-01-01 | 2023-05-01 | Monthly | M | U.S. Dollars | U.S. $ | Not Seasonally Adjusted | NSA | 2023-06-13 07:37:01-05:00 | 80 | Large white, Grade A chicken eggs, sold in a c... |
| PCEPI | PCEPI | 2023-06-20 | 2023-06-20 | Personal Consumption Expenditures: Chain-type ... | 1959-01-01 | 2023-04-01 | Monthly | M | Index 2012=100 | Index 2012=100 | Seasonally Adjusted | SA | 2023-05-26 07:43:07-05:00 | 78 | BEA Account Code: DPCERG The Personal Consump... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| CUUSA101SAT | CUUSA101SAT | 2023-06-20 | 2023-06-20 | Consumer Price Index for All Urban Consumers: ... | 1984-01-01 | 2022-07-01 | Semiannual | SA | Index 1982-1984=100 | Index 1982-1984=100 | Not Seasonally Adjusted | NSA | 2023-01-12 07:40:56-06:00 | 1 | None |
| CUSR0000SERF03 | CUSR0000SERF03 | 2023-06-20 | 2023-06-20 | Consumer Price Index for All Urban Consumers: ... | 1989-01-01 | 2023-05-01 | Monthly | M | Index Dec 1986=100 | Index Dec 1986=100 | Seasonally Adjusted | SA | 2023-06-13 07:44:44-05:00 | 1 | None |
| CUURA102SEHC01 | CUURA102SEHC01 | 2023-06-20 | 2023-06-20 | Consumer Price Index for All Urban Consumers: ... | 1982-12-01 | 2023-05-01 | Monthly | M | Index Dec 1982=100 | Index Dec 1982=100 | Not Seasonally Adjusted | NSA | 2023-06-13 07:44:24-05:00 | 1 | None |
| CUUSA213SAT | CUUSA213SAT | 2023-06-20 | 2023-06-20 | Consumer Price Index for All Urban Consumers: ... | 1984-01-01 | 2017-01-01 | Annual | A | Index 1982-1984=100 | Index 1982-1984=100 | Not Seasonally Adjusted | NSA | 2018-01-12 09:14:03-06:00 | 1 | This series was discontinued as a result of th... |
| CUUSA213SEHA | CUUSA213SEHA | 2023-06-20 | 2023-06-20 | Consumer Price Index for All Urban Consumers: ... | 1984-01-01 | 2017-01-01 | Annual | A | Index 1982-1984=100 | Index 1982-1984=100 | Not Seasonally Adjusted | NSA | 2018-01-12 09:12:25-06:00 | 1 | This series was discontinued as a result of th... |
1000 rows × 15 columns
# create cpi all data
cpi_all = fred.get_series(series_id = 'CPIAUCSL')
cpi_all_df = pd.DataFrame(cpi_all, columns=['CPI_all'])
cpi_all_df = cpi_all_df.reset_index()
cpi_all_df = cpi_all_df.rename(columns={'index': 'Date'})
cpi_all_df
| Date | CPI_all | |
|---|---|---|
| 0 | 1947-01-01 | 21.480 |
| 1 | 1947-02-01 | 21.620 |
| 2 | 1947-03-01 | 22.000 |
| 3 | 1947-04-01 | 22.000 |
| 4 | 1947-05-01 | 21.950 |
| ... | ... | ... |
| 912 | 2023-01-01 | 300.536 |
| 913 | 2023-02-01 | 301.648 |
| 914 | 2023-03-01 | 301.808 |
| 915 | 2023-04-01 | 302.918 |
| 916 | 2023-05-01 | 303.294 |
917 rows × 2 columns
# Export cpi all data to mssql
# cpi_all_df.to_sql('cpi_all', engine, index=False, if_exists='replace')
# engine.dispose()
plt.title("CPI Total")
plt.xlabel("Date")
plt.ylabel("Index")
cpi_all.plot(figsize=(10, 5))
<AxesSubplot:title={'center':'CPI Total'}, xlabel='Date', ylabel='Index'>
# create gas cpi dataframe
cpi_gas = fred.get_series(series_id = 'CUSR0000SETB01')
cpi_gas_df = pd.DataFrame(cpi_gas, columns=['CPI_gas'])
cpi_gas_df = cpi_gas_df.reset_index()
cpi_gas_df = cpi_gas_df.rename(columns={'index': 'Date'})
cpi_gas_df
| Date | CPI_gas | |
|---|---|---|
| 0 | 1967-01-01 | 26.300 |
| 1 | 1967-02-01 | 26.200 |
| 2 | 1967-03-01 | 26.100 |
| 3 | 1967-04-01 | 26.400 |
| 4 | 1967-05-01 | 26.400 |
| ... | ... | ... |
| 672 | 2023-01-01 | 315.662 |
| 673 | 2023-02-01 | 318.945 |
| 674 | 2023-03-01 | 304.332 |
| 675 | 2023-04-01 | 313.479 |
| 676 | 2023-05-01 | 295.962 |
677 rows × 2 columns
# Export cpi gas data to mssql
# cpi_gas_df.to_sql('cpi_gas', engine, index=False, if_exists='replace')
# engine.dispose()
plt.title("CPI Gas")
plt.xlabel("Date")
plt.ylabel("Index")
cpi_gas.plot(figsize=(10, 5))
<AxesSubplot:title={'center':'CPI Gas'}, xlabel='Date', ylabel='Index'>
# Create CPI meats, poultry, fish and eggs dataframe
cpi_meats_poultry_fish_eggs = fred.get_series(series_id = 'CUSR0000SETB01')
cpi_meats_poultry_fish_eggs_df = pd.DataFrame(cpi_meats_poultry_fish_eggs, columns=['CPI_mpfe'])
cpi_meats_poultry_fish_eggs_df = cpi_meats_poultry_fish_eggs_df.reset_index()
cpi_meats_poultry_fish_eggs_df = cpi_meats_poultry_fish_eggs_df.rename(columns={'index': 'Date'})
cpi_meats_poultry_fish_eggs_df
| Date | CPI_mpfe | |
|---|---|---|
| 0 | 1967-01-01 | 26.300 |
| 1 | 1967-02-01 | 26.200 |
| 2 | 1967-03-01 | 26.100 |
| 3 | 1967-04-01 | 26.400 |
| 4 | 1967-05-01 | 26.400 |
| ... | ... | ... |
| 672 | 2023-01-01 | 315.662 |
| 673 | 2023-02-01 | 318.945 |
| 674 | 2023-03-01 | 304.332 |
| 675 | 2023-04-01 | 313.479 |
| 676 | 2023-05-01 | 295.962 |
677 rows × 2 columns
# cpi_meats_poultry_fish_eggs_df.to_sql('cpi_meats_poultry_fish_eggs', engine, index=False, if_exists='replace', method='multi')
# engine.dispose()
plt.title("CPI Meats, Poultry, Fish, Eggs")
plt.xlabel("Date")
plt.ylabel("Index")
cpi_meats_poultry_fish_eggs.plot(figsize=(10, 5))
<AxesSubplot:title={'center':'CPI Meats, Poultry, Fish, Eggs'}, xlabel='Date', ylabel='Index'>
# get merged table from mssql
query = "SELECT * FROM inflation_data"
inflation_data_df = pd.read_sql(query, conn)
conn.close()
inflation_data_df.head()
| Date | CPI_all | CPI_gas | CPI_mpfe | corp_profits_billions | CALM_gross_profits_millions | XOM_gross_profits_millions | avg_egg_price_dozen | avg_price_gas_gallon | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1947-01-01 | 21.48 | NaN | NaN | 21.970 | NaN | NaN | NaN | NaN |
| 1 | 1947-02-01 | 21.62 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | 1947-03-01 | 22.00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | 1947-04-01 | 22.00 | NaN | NaN | 20.788 | NaN | NaN | NaN | NaN |
| 4 | 1947-05-01 | 21.95 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
# correlation between metrics
correlation_matrix = inflation_data_df.corr(method= 'pearson')
sns.heatmap(correlation_matrix, annot=True)
plt.title('Correlation Matrix')
plt.show()
corp_profits_filtered = corp_profits_df[(corp_profits_df['Date'] >= '2020') & (corp_profits_df['Date'] <= '2023')]
cpi_all_filtered = cpi_all_df[(cpi_all_df['Date'] >= '2020') & (cpi_all_df['Date'] <= '2023')]
fig = px.line()
fig.add_trace(go.Scatter(x=corp_profits_filtered['Date'], y=corp_profits_filtered['corp_profits_billions'], name='Corporate Profits'))
fig.add_trace(go.Scatter(x=cpi_all_filtered['Date'], y=cpi_all_filtered['CPI_all'], name='Total CPI', yaxis='y2'))
fig.update_layout(yaxis=dict(title='Billions of Dollars'), yaxis2=dict(title='Index', side='right', overlaying='y'), legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01))
fig.update_yaxes(range=[corp_profits_filtered['corp_profits_billions'].min(), corp_profits_filtered['corp_profits_billions'].max()], secondary_y=False)
fig.update_yaxes(range=[cpi_all_filtered['CPI_all'].min(), cpi_all_filtered['CPI_all'].max()], secondary_y=True)
fig.update_layout(title='Corporate Profits vs Total CPI', xaxis_title='Date')
fig.show()
merged_df = corp_profits_filtered.merge(cpi_all_filtered, on='Date')
sns.regplot(y=merged_df['corp_profits_billions'], x=merged_df['CPI_all'], scatter_kws={'color': 'red'}, line_kws={'color': 'blue'})
plt.title('Regression Plot - Total CPI vs Corporate Profits')
plt.ylabel('Billions of Dollars')
plt.xlabel('Index')
plt.show()
fig = make_subplots(rows=2, cols=1, shared_xaxes=True)
cpi_gas_filtered = inflation_data_df[(inflation_data_df['Date'] >= '2020') & (inflation_data_df['Date'] <= '2023')]
gas_price_filtered = inflation_data_df[(inflation_data_df['Date'] >= '2020') & (inflation_data_df['Date'] <= '2023')]
xom_filtered = inflation_data_df[(inflation_data_df['Date'] >= '2020') & (inflation_data_df['Date'] <= '2023')]
xom_filtered = xom_filtered.dropna(subset=['XOM_gross_profits_millions'])
fig = px.line()
fig.add_trace(go.Scatter(x=cpi_gas_filtered['Date'], y=cpi_gas_filtered['CPI_gas'], name='CPI Gas'))
fig.add_trace(go.Scatter(x=gas_price_filtered['Date'], y=gas_price_filtered['avg_price_gas_gallon'], name='Average Price per Gallon', yaxis='y2'))
fig.add_trace(go.Scatter(x=xom_filtered['Date'], y=xom_filtered['XOM_gross_profits_millions'], name='Exxon Mobil Gross Profits', yaxis='y3'))
fig.update_layout(xaxis=dict(domain=[0, 0.92]),
yaxis=dict(title='Index'),
yaxis2=dict(title='Price (Dollars)', side='right', overlaying='y', anchor='x'),
yaxis3=dict(title='Millions of Dollars', side='right', overlaying='y', position=1),
legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01))
fig.update_yaxes(range=[cpi_gas_filtered['CPI_gas'].min(), cpi_gas_filtered['CPI_gas'].max()], secondary_y=False)
fig.update_yaxes(range=[gas_price_filtered['avg_price_gas_gallon'].min(), gas_price_filtered['avg_price_gas_gallon'].max()], secondary_y=True)
fig.update_yaxes(range=[xom_filtered['XOM_gross_profits_millions'].min(), xom_filtered['XOM_gross_profits_millions'].max()], secondary_y=True)
fig.update_layout(title='CPI Gas vs Average Price of Gas per Gallon vs Exxon Mobil Gross Profits', xaxis_title='Date')
fig.show()
selected_columns = ['CPI_gas', 'avg_price_gas_gallon', 'XOM_gross_profits_millions']
subset_df = inflation_data_df[selected_columns]
correlation_matrix = subset_df.corr(method= 'pearson')
sns.heatmap(correlation_matrix, annot=True)
plt.title('Correlation Matrix - Gasoline')
plt.show()
sns.regplot(x=inflation_data_df['avg_price_gas_gallon'], y=inflation_data_df['XOM_gross_profits_millions'], scatter_kws={'color': 'red'}, line_kws={'color': 'blue'})
plt.title('Regression Plot - Exxon Mobil Gross Profits vs Average Price of Gas per Gallon')
plt.xlabel('Price (Dollars)')
plt.ylabel('Millions of Dollars')
plt.show()
df = inflation_data_df
df = df.dropna(subset=['XOM_gross_profits_millions'])
# Extract the columns as numpy arrays
CPI_gas = df['CPI_gas'].values
avg_price_gas_gallon = df['avg_price_gas_gallon'].values
XOM_gross_profits_millions = df['XOM_gross_profits_millions'].values
# Prepare the feature matrix
X = np.column_stack((CPI_gas, avg_price_gas_gallon))
# Create an instance of the LinearRegression model
model = LinearRegression()
# Fit the model to the data
model.fit(X, XOM_gross_profits_millions)
# Obtain the coefficients and intercept
coefficients = model.coef_
intercept = model.intercept_
# Create a meshgrid for the features
CPI_gas_plot, avg_price_gas_gallon_plot = np.meshgrid(np.linspace(CPI_gas.min(), CPI_gas.max(), 20),
np.linspace(avg_price_gas_gallon.min(), avg_price_gas_gallon.max(), 20))
# Predict the target variable for the meshgrid points
X_plot = np.column_stack((CPI_gas_plot.ravel(), avg_price_gas_gallon_plot.ravel()))
XOM_gross_profits_millions_plot = model.predict(X_plot)
# Create a 3D plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(CPI_gas, avg_price_gas_gallon, XOM_gross_profits_millions, color='blue', label='Data')
tri = ax.plot_trisurf(CPI_gas, avg_price_gas_gallon, XOM_gross_profits_millions, cmap='viridis', alpha=0.5)
tri.set_edgecolor('k')
ax.set_xlabel('CPI Gas')
ax.set_ylabel('Average Price (Dollars)')
ax.set_zlabel('Exxon Mobil Gross Profits (Millions)')
plt.show()
fig = make_subplots(rows=2, cols=1, shared_xaxes=True)
cpi_mpfe_filtered = inflation_data_df[(inflation_data_df['Date'] >= '2020') & (inflation_data_df['Date'] <= '2023')]
egg_price_filtered = inflation_data_df[(inflation_data_df['Date'] >= '2020') & (inflation_data_df['Date'] <= '2023')]
calm_filtered = inflation_data_df[(inflation_data_df['Date'] >= '2020') & (inflation_data_df['Date'] <= '2023')]
calm_filtered = calm_filtered.dropna(subset=['CALM_gross_profits_millions'])
fig = px.line()
fig.add_trace(go.Scatter(x=cpi_mpfe_filtered['Date'], y=cpi_mpfe_filtered['CPI_mpfe'], name='CPI Meats, Poulty, Fish, Eggs'))
fig.add_trace(go.Scatter(x=egg_price_filtered['Date'], y=egg_price_filtered['avg_egg_price_dozen'], name='Average Price of Eggs per Dozen', yaxis='y2'))
fig.add_trace(go.Scatter(x=calm_filtered['Date'], y=calm_filtered['CALM_gross_profits_millions'], name='Cal-Maine Foods Gross Profits', yaxis='y3'))
fig.update_layout(xaxis=dict(domain=[0, 0.92]),
yaxis=dict(title='Index'),
yaxis2=dict(title='Price (Dollars)', side='right', overlaying='y', anchor='x'),
yaxis3=dict(title='Millions of Dollars', side='right', overlaying='y', position=1),
legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01))
fig.update_yaxes(range=[cpi_mpfe_filtered['CPI_mpfe'].min(), cpi_mpfe_filtered['CPI_mpfe'].max()], secondary_y=False)
fig.update_yaxes(range=[gas_price_filtered['avg_egg_price_dozen'].min(), gas_price_filtered['avg_egg_price_dozen'].max()], secondary_y=True)
fig.update_yaxes(range=[xom_filtered['CALM_gross_profits_millions'].min(), xom_filtered['XOM_gross_profits_millions'].max()], secondary_y=True)
fig.update_layout(title='CPI MFPE vs Average Price of Eggs per Dozen vs Cal-Maine Foods Gross Profits', xaxis_title='Date', width=800)
fig.show()
selected_columns = ['CPI_mpfe', 'avg_egg_price_dozen', 'CALM_gross_profits_millions']
subset_df = inflation_data_df[selected_columns]
correlation_matrix = subset_df.corr(method= 'pearson')
sns.heatmap(correlation_matrix, annot=True)
plt.title('Correlation Matrix - Eggs')
plt.show()
sns.regplot(x=inflation_data_df['avg_egg_price_dozen'], y=inflation_data_df['CALM_gross_profits_millions'], scatter_kws={'color': 'red'}, line_kws={'color': 'blue'})
plt.title('Regression Plot - Cal-Maine Foods Profits vs Price of Eggs per Dozen')
plt.xlabel('Average Price of Eggs per Dozen')
plt.ylabel('Millions of Dollars')
plt.show()
df = inflation_data_df
df = df.dropna(subset=['CALM_gross_profits_millions'])
# Extract the columns as numpy arrays
CPI_mpfe = df['CPI_mpfe'].values
avg_egg_price_dozen = df['avg_egg_price_dozen'].values
CALM_gross_profits_millions = df['CALM_gross_profits_millions'].values
# Prepare the feature matrix
X = np.column_stack((CPI_mpfe, avg_egg_price_dozen))
# Create an instance of the LinearRegression model
model = LinearRegression()
# Fit the model to the data
model.fit(X, CALM_gross_profits_millions)
# Obtain the coefficients and intercept
coefficients = model.coef_
intercept = model.intercept_
# Create a meshgrid for the features
CPI_mpfe_plot, avg_egg_price_dozen_plot = np.meshgrid(np.linspace(CPI_mpfe.min(), CPI_mpfe.max(), 20),
np.linspace(avg_egg_price_dozen.min(), avg_egg_price_dozen.max(), 20))
# Predict the target variable for the meshgrid points
X_plot = np.column_stack((CPI_mpfe_plot.ravel(), avg_egg_price_dozen_plot.ravel()))
CALM_gross_profits_millions_plot = model.predict(X_plot)
# Create a 3D plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(CPI_gas, avg_price_gas_gallon, XOM_gross_profits_millions, color='blue', label='Data')
tri = ax.plot_trisurf(CPI_mpfe, avg_egg_price_dozen, CALM_gross_profits_millions, cmap='viridis', alpha=0.5)
tri.set_edgecolor('k')
ax.set_xlabel('CPI MPFE')
ax.set_ylabel('Average Price of Eggs per Dozen')
ax.set_zlabel('Cal-Maine Foods Gross Profits')
plt.show()